{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from google.cloud import storage\n",
    "\n",
    "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../t5/prepare/mesolitica-tpu.json'\n",
    "client = storage.Client()\n",
    "bucket = client.bucket('mesolitica-tpu-general')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import tensorflow_datasets as tfds\n",
    "from t5.data import preprocessors as prep\n",
    "import functools\n",
    "import t5\n",
    "import gin\n",
    "import sentencepiece as spm\n",
    "from glob import glob\n",
    "import os\n",
    "from tensor2tensor.data_generators import problem\n",
    "from tensor2tensor.data_generators import text_problems\n",
    "from tensor2tensor.utils import registry\n",
    "\n",
    "gin.parse_config_file('pretrained_models_base_operative_config.gin')\n",
    "vocab = 'sp10m.cased.t5.model'\n",
    "sp = spm.SentencePieceProcessor()\n",
    "sp.Load(vocab)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import sentencepiece as spm\n",
    "# vocab = 'sp10m.cased.t5.model'\n",
    "# sp = spm.SentencePieceProcessor()\n",
    "# sp.Load(vocab)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def cnn_dataset(split, shuffle_files = False):\n",
    "    del shuffle_files\n",
    "    ds = tf.data.TextLineDataset(glob('t5-data/cnn-summarization-*.tsv'))\n",
    "\n",
    "    ds = ds.map(\n",
    "        functools.partial(\n",
    "            tf.io.decode_csv,\n",
    "            record_defaults = ['', ''],\n",
    "            field_delim = '\\t',\n",
    "            use_quote_delim = False,\n",
    "        ),\n",
    "        num_parallel_calls = tf.data.experimental.AUTOTUNE,\n",
    "    )\n",
    "    ds = ds.map(lambda *ex: dict(zip(['question', 'answer'], ex)))\n",
    "    return ds\n",
    "\n",
    "\n",
    "def cnn_preprocessor(ds):\n",
    "    def to_inputs_and_targets(ex):\n",
    "        return {\n",
    "            'inputs': tf.strings.join(['ringkasan: ', ex['question']]),\n",
    "            'targets': ex['answer'],\n",
    "        }\n",
    "\n",
    "    return ds.map(\n",
    "        to_inputs_and_targets,\n",
    "        num_parallel_calls = tf.data.experimental.AUTOTUNE,\n",
    "    )\n",
    "\n",
    "\n",
    "t5.data.TaskRegistry.remove('cnn_dataset')\n",
    "t5.data.TaskRegistry.add(\n",
    "    'cnn_dataset',\n",
    "    dataset_fn = cnn_dataset,\n",
    "    splits = ['train'],\n",
    "    text_preprocessor = [cnn_preprocessor],\n",
    "    sentencepiece_model_path = vocab,\n",
    "    metric_fns = [t5.evaluation.metrics.accuracy],\n",
    ")\n",
    "\n",
    "\n",
    "def multinews_dataset(split, shuffle_files = False):\n",
    "    del shuffle_files\n",
    "    ds = tf.data.TextLineDataset(glob('t5-data/multinews-summarization-*.tsv'))\n",
    "\n",
    "    ds = ds.map(\n",
    "        functools.partial(\n",
    "            tf.io.decode_csv,\n",
    "            record_defaults = ['', ''],\n",
    "            field_delim = '\\t',\n",
    "            use_quote_delim = False,\n",
    "        ),\n",
    "        num_parallel_calls = tf.data.experimental.AUTOTUNE,\n",
    "    )\n",
    "    ds = ds.map(lambda *ex: dict(zip(['question', 'answer'], ex)))\n",
    "    return ds\n",
    "\n",
    "\n",
    "def multinews_preprocessor(ds):\n",
    "    def to_inputs_and_targets(ex):\n",
    "        return {\n",
    "            'inputs': tf.strings.join(['ringkasan: ', ex['question']]),\n",
    "            'targets': ex['answer'],\n",
    "        }\n",
    "\n",
    "    return ds.map(\n",
    "        to_inputs_and_targets,\n",
    "        num_parallel_calls = tf.data.experimental.AUTOTUNE,\n",
    "    )\n",
    "\n",
    "\n",
    "t5.data.TaskRegistry.remove('multinews_dataset')\n",
    "t5.data.TaskRegistry.add(\n",
    "    'multinews_dataset',\n",
    "    dataset_fn = multinews_dataset,\n",
    "    splits = ['train'],\n",
    "    text_preprocessor = [multinews_preprocessor],\n",
    "    sentencepiece_model_path = vocab,\n",
    "    metric_fns = [t5.evaluation.metrics.accuracy],\n",
    ")\n",
    "\n",
    "def news_dataset(split, shuffle_files = False):\n",
    "    del shuffle_files\n",
    "    ds = tf.data.TextLineDataset(glob('t5-data/news-title-*.tsv'))\n",
    "\n",
    "    ds = ds.map(\n",
    "        functools.partial(\n",
    "            tf.io.decode_csv,\n",
    "            record_defaults = ['', ''],\n",
    "            field_delim = '\\t',\n",
    "            use_quote_delim = False,\n",
    "        ),\n",
    "        num_parallel_calls = tf.data.experimental.AUTOTUNE,\n",
    "    )\n",
    "    ds = ds.map(lambda *ex: dict(zip(['question', 'answer'], ex)))\n",
    "    return ds\n",
    "\n",
    "\n",
    "def news_preprocessor(ds):\n",
    "    def to_inputs_and_targets(ex):\n",
    "        return {\n",
    "            'inputs': tf.strings.join(['tajuk: ', ex['question']]),\n",
    "            'targets': ex['answer'],\n",
    "        }\n",
    "\n",
    "    return ds.map(\n",
    "        to_inputs_and_targets,\n",
    "        num_parallel_calls = tf.data.experimental.AUTOTUNE,\n",
    "    )\n",
    "\n",
    "\n",
    "t5.data.TaskRegistry.remove('news_dataset')\n",
    "t5.data.TaskRegistry.add(\n",
    "    'news_dataset',\n",
    "    dataset_fn = news_dataset,\n",
    "    splits = ['train'],\n",
    "    text_preprocessor = [news_preprocessor],\n",
    "    sentencepiece_model_path = vocab,\n",
    "    metric_fns = [t5.evaluation.metrics.accuracy],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "@registry.register_problem\n",
    "class Seq2Seq(text_problems.Text2TextProblem):\n",
    "\n",
    "    @property\n",
    "    def approx_vocab_size(self):\n",
    "        return 32100\n",
    "    \n",
    "    @property\n",
    "    def is_generate_per_split(self):\n",
    "        return False\n",
    "    \n",
    "    @property\n",
    "    def dataset_splits(self):\n",
    "        return [{\n",
    "            \"split\": problem.DatasetSplit.TRAIN,\n",
    "            \"shards\": 100,\n",
    "        }]\n",
    "    \n",
    "    def generate_samples(self, data_dir, tmp_dir, dataset_split):\n",
    "        del data_dir\n",
    "        del tmp_dir\n",
    "        del dataset_split\n",
    "        \n",
    "        nq_task = t5.data.TaskRegistry.get(\"cnn_dataset\")\n",
    "        ds = nq_task.get_dataset(split='qa.tsv', sequence_length={\"inputs\": 1024, \"targets\": 1024})\n",
    "        \n",
    "        for ex in tqdm(tfds.as_numpy(ds)):\n",
    "            yield ex\n",
    "            \n",
    "        nq_task = t5.data.TaskRegistry.get(\"multinews_dataset\")\n",
    "        ds = nq_task.get_dataset(split='qa.tsv', sequence_length={\"inputs\": 1024, \"targets\": 1024})\n",
    "        \n",
    "        for ex in tqdm(tfds.as_numpy(ds)):\n",
    "            yield ex\n",
    "        \n",
    "        nq_task = t5.data.TaskRegistry.get(\"news_dataset\")\n",
    "        ds = nq_task.get_dataset(split='qa.tsv', sequence_length={\"inputs\": 768, \"targets\": 1024})\n",
    "        \n",
    "        for ex in tqdm(tfds.as_numpy(ds)):\n",
    "            if len(ex['targets']) > 4:\n",
    "                yield ex\n",
    "                    \n",
    "    def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):\n",
    "        \n",
    "        generator = self.generate_samples(data_dir, tmp_dir, dataset_split)\n",
    "        for sample in generator:\n",
    "            sample[\"inputs\"] = sample['inputs'].tolist()\n",
    "            sample[\"targets\"] = sample['targets'].tolist()\n",
    "            yield sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "!rm -rf t2t-summarization/data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATA_DIR = os.path.expanduser(\"t2t-summarization/data\")\n",
    "TMP_DIR = os.path.expanduser(\"t2t-summarization/tmp\")\n",
    " \n",
    "tf.gfile.MakeDirs(DATA_DIR)\n",
    "tf.gfile.MakeDirs(TMP_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "0it [00:00, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 0.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 0.\n",
      "99979it [02:56, 578.06it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 100000.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 100000.\n",
      "140382it [04:07, 566.80it/s]\n",
      "59598it [04:58, 272.20it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 200000.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 200000.\n",
      "101574it [08:14, 205.43it/s]\n",
      "61616it [01:27, 852.31it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 300000.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 300000.\n",
      "168167it [03:53, 705.28it/s] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 400000.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 400000.\n",
      "274170it [06:14, 591.92it/s] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 500000.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generating case 500000.\n",
      "293443it [06:40, 731.99it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Generated 518093 Examples\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "INFO:tensorflow:Generated 518093 Examples\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Shuffling data...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Shuffling data...\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/tensor2tensor-1.15.7-py3.6.egg/tensor2tensor/data_generators/generator_utils.py:477: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use eager execution and: \n",
      "`tf.data.TFRecordDataset(path)`\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/tensor2tensor-1.15.7-py3.6.egg/tensor2tensor/data_generators/generator_utils.py:477: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use eager execution and: \n",
      "`tf.data.TFRecordDataset(path)`\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Data shuffled.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Data shuffled.\n"
     ]
    }
   ],
   "source": [
    "from tensor2tensor.utils import registry\n",
    "from tensor2tensor import problems\n",
    "\n",
    "PROBLEM = 'seq2_seq'\n",
    "t2t_problem = problems.problem(PROBLEM)\n",
    "t2t_problem.generate_data(DATA_DIR, TMP_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['t2t-summarization/data/seq2_seq-train-00022-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00017-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00054-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00008-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00014-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00057-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00036-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00038-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00012-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00037-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00053-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00065-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00089-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00046-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00030-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00084-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00074-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00049-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00064-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00006-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00045-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00043-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00056-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00016-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00019-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00058-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00055-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00063-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00090-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00031-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00093-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00095-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00000-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00010-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00070-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00073-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00032-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00039-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00094-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00015-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00075-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00072-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00041-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00048-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00086-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00040-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00085-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00051-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00061-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00076-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00050-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00077-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00024-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00018-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00069-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00034-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00002-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00023-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00052-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00083-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00080-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00062-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00004-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00092-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00033-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00081-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00082-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00060-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00068-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00087-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00009-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00013-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00066-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00007-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00011-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00035-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00091-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00097-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00020-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00005-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00098-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00047-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00096-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00029-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00099-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00044-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00088-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00021-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00079-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00078-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00025-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00027-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00071-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00059-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00003-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00028-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00067-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00001-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00042-of-00100',\n",
       " 't2t-summarization/data/seq2_seq-train-00026-of-00100']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from glob import glob\n",
    "\n",
    "files = glob('t2t-summarization/data/*')\n",
    "files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "t2t-summarization/data/seq2_seq-train-00022-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00017-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00054-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00008-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00014-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00057-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00036-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00038-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00012-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00037-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00053-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00065-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00089-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00046-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00030-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00084-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00074-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00049-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00064-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00006-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00045-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00043-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00056-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00016-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00019-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00058-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00055-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00063-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00090-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00031-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00093-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00095-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00000-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00010-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00070-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00073-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00032-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00039-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00094-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00015-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00075-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00072-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00041-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00048-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00086-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00040-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00085-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00051-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00061-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00076-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00050-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00077-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00024-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00018-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00069-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00034-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00002-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00023-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00052-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00083-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00080-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00062-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00004-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00092-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00033-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00081-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00082-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00060-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00068-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00087-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00009-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00013-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00066-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00007-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00011-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00035-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00091-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00097-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00020-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00005-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00098-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00047-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00096-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00029-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00099-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00044-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00088-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00021-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00079-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00078-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00025-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00027-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00071-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00059-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00003-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00028-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00067-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00001-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00042-of-00100\n",
      "t2t-summarization/data/seq2_seq-train-00026-of-00100\n"
     ]
    }
   ],
   "source": [
    "for file in files:\n",
    "    print(file)\n",
    "    blob = bucket.blob(file)\n",
    "    blob.upload_from_filename(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
